# Load packages

install.packages("chinese.misc")
install.packages("stm")
install.packages("Cairo")
require(chinese.misc)
require(jiebaR)
require(jiebaRD)
require(stm)
require(tidyverse)
require(wordcloud)
require(igraph)
require(Cairo)

#------------------------------------------------------------------------------------------
## Method 6. Structural Topic Model (STM)
#------------------------------------------------------------------------------------------

# Load data

Mob_India_1962 <- read_csv("mobilization_campaign_India_1962.csv")
Mob_Soviet_1969 <- read_csv("mobilization_campaign_Soviet_1969.csv")
Mob_Vietnam_1974 <- read_csv("mobilization_campaign_Vietnam_1974.csv")
Mob_Vietnam_1979 <- read_csv("mobilization_campaign_Vietnam_1979.csv")
Mob_Vietnam_1979 <- Mob_Vietnam_1979[,1:7]

Mob_Combined <- rbind(Mob_India_1962, Mob_Soviet_1969, Mob_Vietnam_1974, Mob_Vietnam_1979)
Mob_Ori <- Mob_Combined$Text

Pac_Japan_1990 <- read_csv("pacification_campaign_Japan_1990.csv")
Pac_Japan_1996 <- read_csv("pacification_campaign_Japan_1996.csv")
Pac_Japan_2005 <- read_csv("pacification_campaign_Japan_2005.csv")
Pac_Japan_2010 <- read_csv("pacification_campaign_Japan_2010.csv")
Pac_Japan_2012 <- read_csv("pacification_campaign_Japan_2012.csv")
Pac_Philippines_2016 <- read_csv("pacification_campaign_Philippines_2016.csv")
Pac_India_2017 <- read_csv("pacification_campaign_India_2017.csv")

Pac_Combined <- rbind(Pac_Japan_1990, Pac_Japan_1996, Pac_Japan_2005,
                      Pac_Japan_2010, Pac_Japan_2012, Pac_Philippines_2016,
                      Pac_India_2017)
Pac_Ori <- Pac_Combined$Text

wk <- worker()

# ==================== Figure 7.11 =====================================================

# India 1962
Origin_token_India1962 <- seg_file(Mob_India_1962$Text, from = 'v')
Stopword_Removed_India1962 <- c(NULL)
for (i in 1:length(Mob_India_1962$Text)){
  Stopword_Removed_India1962[i] <-slim_text(Mob_India_1962$Text[i], rm_place = FALSE)
}
India1962_date_tag <- as.Date(paste(Mob_India_1962$Year,Mob_India_1962$Month,Mob_India_1962$Day,sep = '-')) 
India1962_Data <- data.frame(India1962_date_tag,Stopword_Removed_India1962)

processed_India1962 <- textProcessor(India1962_Data$Stopword_Removed_India1962, metadata = India1962_Data)
out_India1962 <- prepDocuments(processed_India1962$documents, processed_India1962$vocab, processed_India1962$meta)
docs_India1962 <- out_India1962$documents
vocab_India1962 <- out_India1962$vocab
meta_India1962 <- out_India1962$meta

storage_India1962 <- searchK(documents = out_India1962$documents,vocab = out_India1962$vocab,
                             K=c(5,7,10), prevalence = ~s(India1962_date_tag), 
                             data = out_India1962$meta)
storage_India1962$results # print result

poliblogPrevFit_India1962 <- stm(documents = out_India1962$documents,vocab = out_India1962$vocab,
                                 K=10, prevalence = ~s(India1962_date_tag),
                                 max.em.its = 75, data = out_India1962$meta, seed = 1111)
plot.STM(poliblogPrevFit_India1962, type = "summary",n=5,xlim = c(0,0.6),family = "Microsoft YaHei") # plot STM result

pdf("Topics_India1962.pdf",family = "GB1",width = 9)
plot.STM(poliblogPrevFit_India1962, type = "summary",n=5,xlim = c(0,0.6))
dev.off()

cloud(poliblogPrevFit_India1962, topic = 8,family = 'Microsoft YaHei',scale = c(2.5,.5))
pdf("WordCloud_India1962_Topic8.pdf",family = "GB1")
cloud(poliblogPrevFit_India1962, topic = 8,scale = c(2.5,.5))
dev.off()

pdf("WordCloud_India1962_Topic3.pdf",family = "GB1")
cloud(poliblogPrevFit_India1962, topic = 3,scale = c(2.5,.5))
dev.off()

pdf("WordCloud_India1962_Topic2.pdf",family = "GB1")
cloud(poliblogPrevFit_India1962, topic = 2,scale = c(2.5,.5))
dev.off()

pdf("WordCloud_India1962_Topic5.pdf",family = "GB1")
cloud(poliblogPrevFit_India1962, topic = 5,scale = c(2.5,.5))
dev.off()

pdf("WordCloud_India1962_Topic4.pdf",family = "GB1")
cloud(poliblogPrevFit_India1962, topic = 4,scale = c(2.5,.5))
dev.off()

topic_corr <- topicCorr(poliblogPrevFit_India1962)
plot(topic_corr)

# Soviet 1969
Origin_token_Soviet1969 <- seg_file(Mob_Soviet_1969$Text, from = 'v')
Stopword_Removed_Soviet1969 <- c(NULL)
for (i in 1:length(Mob_Soviet_1969$Text)){
  Stopword_Removed_Soviet1969[i] <-slim_text(Mob_Soviet_1969$Text[i], rm_place = FALSE)
}
Soviet1969_date_tag <- as.Date(paste(Mob_Soviet_1969$Year,Mob_Soviet_1969$Month,Mob_Soviet_1969$Day,sep = '-')) 
Soviet1969_Data <- data.frame(Soviet1969_date_tag,Stopword_Removed_Soviet1969)

processed_Soviet1969 <- textProcessor(Soviet1969_Data$Stopword_Removed_Soviet1969, metadata = Soviet1969_Data)
out_Soviet1969 <- prepDocuments(processed_Soviet1969$documents, processed_Soviet1969$vocab, processed_Soviet1969$meta)
docs_Soviet1969 <- out_Soviet1969$documents
vocab_Soviet1969 <- out_Soviet1969$vocab
meta_Soviet1969 <- out_Soviet1969$meta

storage_Soviet1969 <- searchK(documents = out_Soviet1969$documents,vocab = out_Soviet1969$vocab,
                              K=c(5,7,10), prevalence = ~s(Soviet1969_date_tag), 
                              data = out_Soviet1969$meta)
storage_Soviet1969 $results # print result

poliblogPrevFit_Soviet1969 <- stm(documents = out_Soviet1969$documents,vocab = out_Soviet1969$vocab,
                                  K=10, prevalence = ~s(Soviet1969_date_tag),
                                  max.em.its = 75, data = out_Soviet1969$meta, seed = 1111)
plot.STM(poliblogPrevFit_Soviet1969, type = "summary",n=5,xlim = c(0,0.6),family = "Microsoft YaHei") # plot STM result

pdf("Topics_Soviet1969.pdf",family = "GB1",width = 9)
plot.STM(poliblogPrevFit_Soviet1969, type = "summary",n=5,xlim = c(0,0.6))
dev.off()

pdf("WordCloud_Soviet1969_Topic5.pdf",family = "GB1")
cloud(poliblogPrevFit_Soviet1969, topic = 5,scale = c(2.5,.5))
dev.off()

pdf("WordCloud_Soviet1969_Topic7.pdf",family = "GB1")
cloud(poliblogPrevFit_Soviet1969, topic = 7,scale = c(2.5,.5))
dev.off()

pdf("WordCloud_Soviet1969_Topic2.pdf",family = "GB1")
cloud(poliblogPrevFit_Soviet1969, topic = 2,scale = c(2.5,.5))
dev.off()

pdf("WordCloud_Soviet1969_Topic10.pdf",family = "GB1")
cloud(poliblogPrevFit_Soviet1969, topic = 10,scale = c(2.5,.5))
dev.off()

pdf("WordCloud_Soviet1969_Topic8.pdf",family = "GB1")
cloud(poliblogPrevFit_Soviet1969, topic = 8,scale = c(2.5,.5))
dev.off()

topic_corr <- topicCorr(poliblogPrevFit_Soviet1969)
plot(topic_corr)

# Vietnam 1974
Origin_token_Vietnam1974 <- seg_file(Mob_Vietnam_1974$Text, from = 'v')
Stopword_Removed_Vietnam1974 <- c(NULL)
for (i in 1:length(Mob_Vietnam_1974$Text)){
  Stopword_Removed_Vietnam1974[i] <-slim_text(Mob_Vietnam_1974$Text[i], rm_place = FALSE)
}
Vietnam1974_date_tag <- as.Date(paste(Mob_Vietnam_1974$Year,Mob_Vietnam_1974$Month,Mob_Vietnam_1974$Day,sep = '-')) 
Vietnam1974_Data <- data.frame(Vietnam1974_date_tag,Stopword_Removed_Vietnam1974)

processed_Vietnam1974 <- textProcessor(Vietnam1974_Data$Stopword_Removed_Vietnam1974, metadata = Vietnam1974_Data)
out_Vietnam1974 <- prepDocuments(processed_Vietnam1974$documents, processed_Vietnam1974$vocab, processed_Vietnam1974$meta)
docs_Vietnam1974 <- out_Vietnam1974$documents
vocab_Vietnam1974 <- out_Vietnam1974$vocab
meta_Vietnam1974 <- out_Vietnam1974$meta

storage_Vietnam1974 <- searchK(documents = out_Vietnam1974$documents,vocab = out_Vietnam1974$vocab,
                               K=c(3,5), prevalence = ~s(Vietnam1974_date_tag), 
                               data = out_Vietnam1974$meta)
storage_Vietnam1974$results # print result

poliblogPrevFit_Vietnam1974 <- stm(documents = out_Vietnam1974$documents,vocab = out_Vietnam1974$vocab,
                                   K=3, prevalence = ~s(Vietnam1974_date_tag),
                                   max.em.its = 75, data = out_Vietnam1974$meta, seed = 1111)
plot.STM(poliblogPrevFit_Vietnam1974, type = "summary",n=5,xlim = c(0,2),family = "Microsoft YaHei") # plot STM result

pdf("Topics_Vietnam1974.pdf",family = "GB1",width = 9)
plot.STM(poliblogPrevFit_Vietnam1974, type = "summary",n=5,xlim = c(0,0.6))
dev.off()

pdf("WordCloud_Vietnam1974_Topic3.pdf",family = "GB1")
cloud(poliblogPrevFit_Vietnam1974, topic = 3,scale = c(2.5,.5))
dev.off()

pdf("WordCloud_Vietnam1974_Topic2.pdf",family = "GB1")
cloud(poliblogPrevFit_Vietnam1974, topic = 2,scale = c(2.5,.5))
dev.off()

pdf("WordCloud_Vietnam1974_Topic1.pdf",family = "GB1")
cloud(poliblogPrevFit_Vietnam1974, topic = 1,scale = c(2.5,.5))
dev.off()

topic_corr <- topicCorr(poliblogPrevFit_Vietnam1974)
plot(topic_corr)

# Vietnam 1979
Origin_token_Vietnam1979 <- seg_file(Mob_Vietnam_1979$Text, from = 'v')
Stopword_Removed_Vietnam1979 <- c(NULL)
for (i in 1:length(Mob_Vietnam_1979$Text)){
  Stopword_Removed_Vietnam1979[i] <-slim_text(Mob_Vietnam_1979$Text[i], rm_place = FALSE)
}
Vietnam1979_date_tag <- as.Date(paste(Mob_Vietnam_1979$Year,Mob_Vietnam_1979$Month,Mob_Vietnam_1979$Day,sep = '-')) 
Vietnam1979_Data <- data.frame(Vietnam1979_date_tag,Stopword_Removed_Vietnam1979)

processed_Vietnam1979 <- textProcessor(Vietnam1979_Data$Stopword_Removed_Vietnam1979, metadata = Vietnam1979_Data)
out_Vietnam1979 <- prepDocuments(processed_Vietnam1979$documents, processed_Vietnam1979$vocab, processed_Vietnam1979$meta)
docs_Vietnam1979 <- out_Vietnam1979$documents
vocab_Vietnam1979 <- out_Vietnam1979$vocab
meta_Vietnam1979 <- out_Vietnam1979$meta

storage_Vietnam1979 <- searchK(documents = out_Vietnam1979$documents,vocab = out_Vietnam1979$vocab,
                               K=c(10,15,20), prevalence = ~s(Vietnam1979_date_tag), 
                               data = out_Vietnam1979$meta)
storage_Vietnam1979$results # print result

poliblogPrevFit_Vietnam1979 <- stm(documents = out_Vietnam1979$documents,vocab = out_Vietnam1979$vocab,
                                   K=15, prevalence = ~s(Vietnam1979_date_tag),
                                   max.em.its = 75, data = out_Vietnam1979$meta, seed = 1111)
plot.STM(poliblogPrevFit_Vietnam1979, type = "summary",n=5,xlim = c(0,1),family = "Microsoft YaHei") # plot STM result

pdf("Topics_Vietnam1979.pdf",family = "GB1",width = 9)
plot.STM(poliblogPrevFit_Vietnam1979, type = "summary",n=5,xlim = c(0,1))
dev.off()

pdf("WordCloud_Vietnam1979_Topic8.pdf",family = "GB1")
cloud(poliblogPrevFit_Vietnam1979, topic = 8,scale = c(2.5,.5))
dev.off()

pdf("WordCloud_Vietnam1979_Topic10.pdf",family = "GB1")
cloud(poliblogPrevFit_Vietnam1979, topic = 10,scale = c(2.5,.5))
dev.off()

pdf("WordCloud_Vietnam1979_Topic13.pdf",family = "GB1")
cloud(poliblogPrevFit_Vietnam1979, topic = 13,scale = c(2.5,.5))
dev.off()

pdf("WordCloud_Vietnam1979_Topic14.pdf",family = "GB1")
cloud(poliblogPrevFit_Vietnam1979, topic = 14,scale = c(2.5,.5))
dev.off()

pdf("WordCloud_Vietnam1979_Topic1.pdf",family = "GB1")
cloud(poliblogPrevFit_Vietnam1979, topic = 1,scale = c(2.5,.5))
dev.off()

topic_corr <- topicCorr(poliblogPrevFit_Vietnam1979)
plot(topic_corr)

# ==================== Figure 7.12 =====================================================

# Japan 1990
Origin_token_Japan1990 <- seg_file(Pac_Japan_1990$Text, from = 'v')
Stopword_Removed_Japan1990 <- c(NULL)
for (i in 1:length(Pac_Japan_1990$Text)){
  Stopword_Removed_Japan1990[i] <-slim_text(Pac_Japan_1990$Text[i], rm_place = FALSE)
}
Japan1990_date_tag <- as.Date(paste(Pac_Japan_1990$Year,Pac_Japan_1990$Month,Pac_Japan_1990$Day,sep = '-')) 
Japan1990_Data <- data.frame(Japan1990_date_tag,Stopword_Removed_Japan1990)

processed_Japan1990 <- textProcessor(Japan1990_Data$Stopword_Removed_Japan1990, metadata = Japan1990_Data)
out_Japan1990 <- prepDocuments(processed_Japan1990$documents, processed_Japan1990$vocab, processed_Japan1990$meta)
docs_Japan1990 <- out_Japan1990$documents
vocab_Japan1990 <- out_Japan1990$vocab
meta_Japan1990 <- out_Japan1990$meta

storage_Japan1990 <- searchK(documents = out_Japan1990$documents,vocab = out_Japan1990$vocab,
                             K=c(5,7,10), prevalence = ~s(Japan1990_date_tag), 
                             data = out_Japan1990$meta)
storage_Japan1990$results # print result

poliblogPrevFit_Japan1990 <- stm(documents = out_Japan1990$documents,vocab = out_Japan1990$vocab,
                                 K=5, prevalence = ~s(Japan1990_date_tag),
                                 max.em.its = 75, data = out_Japan1990$meta, seed = 1111)
plot.STM(poliblogPrevFit_Japan1990, type = "summary",n=5,xlim = c(0,1),family = "Microsoft YaHei") # plot STM result

pdf("Topics_Japan1990.pdf",family = "GB1",width = 9)
plot.STM(poliblogPrevFit_Japan1990, type = "summary",n=5,xlim = c(0,1))
dev.off()

pdf("WordCloud_Japan1990_Topic5.pdf",family = "GB1")
cloud(poliblogPrevFit_Japan1990, topic = 5,scale = c(2.5,.5))
dev.off()

pdf("WordCloud_Japan1990_Topic1.pdf",family = "GB1")
cloud(poliblogPrevFit_Japan1990, topic = 1,scale = c(2.5,.5))
dev.off()

pdf("WordCloud_Japan1990_Topic2.pdf",family = "GB1")
cloud(poliblogPrevFit_Japan1990, topic = 2,scale = c(2.5,.5))
dev.off()

pdf("WordCloud_Japan1990_Topic4.pdf",family = "GB1")
cloud(poliblogPrevFit_Japan1990, topic = 4,scale = c(2.5,.5))
dev.off()

pdf("WordCloud_Japan1990_Topic3.pdf",family = "GB1")
cloud(poliblogPrevFit_Japan1990, topic = 3,scale = c(2.5,.5))
dev.off()

topic_corr <- topicCorr(poliblogPrevFit_Japan1990)
plot(topic_corr)

# Japan 1996
Origin_token_Japan1996 <- seg_file(Pac_Japan_1996$Text, from = 'v')
Stopword_Removed_Japan1996 <- c(NULL)
for (i in 1:length(Pac_Japan_1996$Text)){
  Stopword_Removed_Japan1996[i] <-slim_text(Pac_Japan_1996$Text[i], rm_place = FALSE)
}
Japan1996_date_tag <- as.Date(paste(Pac_Japan_1996$Year,Pac_Japan_1996$Month,Pac_Japan_1996$Day,sep = '-')) 
Japan1996_Data <- data.frame(Japan1996_date_tag,Stopword_Removed_Japan1996)

processed_Japan1996 <- textProcessor(Japan1996_Data$Stopword_Removed_Japan1996, metadata = Japan1996_Data)
out_Japan1996 <- prepDocuments(processed_Japan1996$documents, processed_Japan1996$vocab, processed_Japan1996$meta)
docs_Japan1996 <- out_Japan1996$documents
vocab_Japan1996 <- out_Japan1996$vocab
meta_Japan1996 <- out_Japan1996$meta

storage_Japan1996 <- searchK(documents = out_Japan1996$documents,vocab = out_Japan1996$vocab,
                             K=c(5,7,10), prevalence = ~s(Japan1996_date_tag), 
                             data = out_Japan1996$meta)
storage_Japan1996$results # print result

poliblogPrevFit_Japan1996 <- stm(documents = out_Japan1996$documents,vocab = out_Japan1996$vocab,
                                 K=7, prevalence = ~s(Japan1996_date_tag),
                                 max.em.its = 75, data = out_Japan1996$meta, seed = 1111)
plot.STM(poliblogPrevFit_Japan1996, type = "summary",n=5,xlim = c(0,1),family = "Microsoft YaHei") # plot STM result

pdf("Topics_Japan1996.pdf",family = "GB1",width = 9)
plot.STM(poliblogPrevFit_Japan1996, type = "summary",n=5,xlim = c(0,1))
dev.off()

pdf("WordCloud_Japan1996_Topic1.pdf",family = "GB1")
cloud(poliblogPrevFit_Japan1996, topic = 1,scale = c(2.5,.5))
dev.off()

pdf("WordCloud_Japan1996_Topic5.pdf",family = "GB1")
cloud(poliblogPrevFit_Japan1996, topic = 5,scale = c(2.5,.5))
dev.off()

pdf("WordCloud_Japan1996_Topic4.pdf",family = "GB1")
cloud(poliblogPrevFit_Japan1996, topic = 4,scale = c(2.5,.5))
dev.off()

pdf("WordCloud_Japan1996_Topic2.pdf",family = "GB1")
cloud(poliblogPrevFit_Japan1996, topic = 2,scale = c(2.5,.5))
dev.off()

pdf("WordCloud_Japan1996_Topic7.pdf",family = "GB1")
cloud(poliblogPrevFit_Japan1996, topic = 7,scale = c(2.5,.5))
dev.off()

topic_corr <- topicCorr(poliblogPrevFit_Japan1996)
plot(topic_corr)

# Japan 2005
Origin_token_Japan2005 <- seg_file(Pac_Japan_2005$Text, from = 'v')
Stopword_Removed_Japan2005 <- c(NULL)
for (i in 1:length(Pac_Japan_2005$Text)){
  Stopword_Removed_Japan2005[i] <-slim_text(Pac_Japan_2005$Text[i], rm_place = FALSE)
}
Japan2005_date_tag <- as.Date(paste(Pac_Japan_2005$Year,Pac_Japan_2005$Month,Pac_Japan_2005$Day,sep = '-')) 
Japan2005_Data <- data.frame(Japan2005_date_tag,Stopword_Removed_Japan2005)

processed_Japan2005 <- textProcessor(Japan2005_Data$Stopword_Removed_Japan2005, metadata = Japan2005_Data)
out_Japan2005 <- prepDocuments(processed_Japan2005$documents, processed_Japan2005$vocab, processed_Japan2005$meta)
docs_Japan2005 <- out_Japan2005$documents
vocab_Japan2005 <- out_Japan2005$vocab
meta_Japan2005 <- out_Japan2005$meta

storage_Japan2005 <- searchK(documents = out_Japan2005$documents,vocab = out_Japan2005$vocab,
                             K=c(5,7,10), prevalence = ~s(Japan2005_date_tag), 
                             data = out_Japan2005$meta)
storage_Japan2005$results # print result

poliblogPrevFit_Japan2005 <- stm(documents = out_Japan2005$documents,vocab = out_Japan2005$vocab,
                                 K=7, prevalence = ~s(Japan2005_date_tag),
                                 max.em.its = 75, data = out_Japan2005$meta, seed = 1111)
plot.STM(poliblogPrevFit_Japan2005, type = "summary",n=5,xlim = c(0,1),family = "Microsoft YaHei") # plot STM result

pdf("Topics_Japan2005.pdf",family = "GB1",width = 9)
plot.STM(poliblogPrevFit_Japan2005, type = "summary",n=5,xlim = c(0,1))
dev.off()

pdf("WordCloud_Japan2005_Topic4.pdf",family = "GB1")
cloud(poliblogPrevFit_Japan2005, topic = 4,scale = c(2.5,.5))
dev.off()

pdf("WordCloud_Japan2005_Topic7.pdf",family = "GB1")
cloud(poliblogPrevFit_Japan2005, topic = 7,scale = c(2.5,.5))
dev.off()

pdf("WordCloud_Japan2005_Topic5.pdf",family = "GB1")
cloud(poliblogPrevFit_Japan2005, topic = 5,scale = c(2.5,.5))
dev.off()

pdf("WordCloud_Japan2005_Topic3.pdf",family = "GB1")
cloud(poliblogPrevFit_Japan2005, topic = 3,scale = c(2.5,.5))
dev.off()

pdf("WordCloud_Japan2005_Topic6.pdf",family = "GB1")
cloud(poliblogPrevFit_Japan2005, topic = 6,scale = c(2.5,.5))
dev.off()

topic_corr <- topicCorr(poliblogPrevFit_Japan2005)
plot(topic_corr)

# Japan 2010
Origin_token_Japan2010 <- seg_file(Pac_Japan_2010$Text, from = 'v')
Stopword_Removed_Japan2010 <- c(NULL)
for (i in 1:length(Pac_Japan_2010$Text)){
  Stopword_Removed_Japan2010[i] <-slim_text(Pac_Japan_2010$Text[i], rm_place = FALSE)
}
Japan2010_date_tag <- as.Date(paste(Pac_Japan_2010$Year,Pac_Japan_2010$Month,Pac_Japan_2010$Day,sep = '-')) 
Japan2010_Data <- data.frame(Japan2010_date_tag,Stopword_Removed_Japan2010)

processed_Japan2010 <- textProcessor(Japan2010_Data$Stopword_Removed_Japan2010, metadata = Japan2010_Data)
out_Japan2010 <- prepDocuments(processed_Japan2010$documents, processed_Japan2010$vocab, processed_Japan2010$meta)
docs_Japan2010 <- out_Japan2010$documents
vocab_Japan2010 <- out_Japan2010$vocab
meta_Japan2010 <- out_Japan2010$meta

storage_Japan2010 <- searchK(documents = out_Japan2010$documents,vocab = out_Japan2010$vocab,
                             K=c(5,7,10), prevalence = ~s(Japan2010_date_tag), 
                             data = out_Japan2010$meta)
storage_Japan2010$results # print result

poliblogPrevFit_Japan2010 <- stm(documents = out_Japan2010$documents,vocab = out_Japan2010$vocab,
                                 K=5, prevalence = ~s(Japan2010_date_tag),
                                 max.em.its = 75, data = out_Japan2010$meta, seed = 1111)
plot.STM(poliblogPrevFit_Japan2010, type = "summary",n=5,xlim = c(0,1),family = "Microsoft YaHei") # plot STM result

pdf("Topics_Japan2010.pdf",family = "GB1",width = 9)
plot.STM(poliblogPrevFit_Japan2010, type = "summary",n=5,xlim = c(0,1))
dev.off()

pdf("WordCloud_Japan2010_Topic5.pdf",family = "GB1")
cloud(poliblogPrevFit_Japan2010, topic = 5,scale = c(2.5,.5))
dev.off()

pdf("WordCloud_Japan2010_Topic4.pdf",family = "GB1")
cloud(poliblogPrevFit_Japan2010, topic = 4,scale = c(2.5,.5))
dev.off()

pdf("WordCloud_Japan2010_Topic1.pdf",family = "GB1")
cloud(poliblogPrevFit_Japan2010, topic = 1,scale = c(2.5,.5))
dev.off()

pdf("WordCloud_Japan2010_Topic3.pdf",family = "GB1")
cloud(poliblogPrevFit_Japan2010, topic = 3,scale = c(2.5,.5))
dev.off()

pdf("WordCloud_Japan2010_Topic2.pdf",family = "GB1")
cloud(poliblogPrevFit_Japan2010, topic = 2,scale = c(2.5,.5))
dev.off()

topic_corr <- topicCorr(poliblogPrevFit_Japan2010)
plot(topic_corr)

# Japan 2012
Origin_token_Japan2012 <- seg_file(Pac_Japan_2012$Text, from = 'v')
Stopword_Removed_Japan2012 <- c(NULL)
for (i in 1:length(Pac_Japan_2012$Text)){
  Stopword_Removed_Japan2012[i] <-slim_text(Pac_Japan_2012$Text[i], rm_place = FALSE)
}
Japan2012_date_tag <- as.Date(paste(Pac_Japan_2012$Year,Pac_Japan_2012$Month,Pac_Japan_2012$Day,sep = '-')) 
Japan2012_Data <- data.frame(Japan2012_date_tag,Stopword_Removed_Japan2012)

processed_Japan2012 <- textProcessor(Japan2012_Data$Stopword_Removed_Japan2012, metadata = Japan2012_Data)
out_Japan2012 <- prepDocuments(processed_Japan2012$documents, processed_Japan2012$vocab, processed_Japan2012$meta)
docs_Japan2012 <- out_Japan2012$documents
vocab_Japan2012 <- out_Japan2012$vocab
meta_Japan2012 <- out_Japan2012$meta

storage_Japan2012 <- searchK(documents = out_Japan2012$documents,vocab = out_Japan2012$vocab,
                             K=c(5,7,10), prevalence = ~s(Japan2012_date_tag), 
                             data = out_Japan2012$meta)
storage_Japan2012$results # print result

poliblogPrevFit_Japan2012 <- stm(documents = out_Japan2012$documents,vocab = out_Japan2012$vocab,
                                 K=7, prevalence = ~s(Japan2012_date_tag),
                                 max.em.its = 75, data = out_Japan2012$meta, seed = 1111)
plot.STM(poliblogPrevFit_Japan2012, type = "summary",n=5,xlim = c(0,1),family = "Microsoft YaHei") # plot STM result

pdf("Topics_Japan2012.pdf",family = "GB1",width = 9)
plot.STM(poliblogPrevFit_Japan2012, type = "summary",n=5,xlim = c(0,1))
dev.off()

pdf("WordCloud_Japan2012_Topic7.pdf",family = "GB1")
cloud(poliblogPrevFit_Japan2012, topic = 7,scale = c(2.5,.5))
dev.off()

pdf("WordCloud_Japan2012_Topic4.pdf",family = "GB1")
cloud(poliblogPrevFit_Japan2012, topic = 4,scale = c(2.5,.5))
dev.off()

pdf("WordCloud_Japan2012_Topic2.pdf",family = "GB1")
cloud(poliblogPrevFit_Japan2012, topic = 2,scale = c(2.5,.5))
dev.off()

pdf("WordCloud_Japan2012_Topic5.pdf",family = "GB1")
cloud(poliblogPrevFit_Japan2012, topic = 5,scale = c(2.5,.5))
dev.off()

pdf("WordCloud_Japan2012_Topic1.pdf",family = "GB1")
cloud(poliblogPrevFit_Japan2012, topic = 1,scale = c(2.5,.5))
dev.off()

topic_corr <- topicCorr(poliblogPrevFit_Japan2012)
plot(topic_corr)

# Philippines 2016
Origin_token_Philippines2016 <- seg_file(Pac_Philippines_2016$Text, from = 'v')
Stopword_Removed_Philippines2016 <- c(NULL)
for (i in 1:length(Pac_Philippines_2016$Text)){
  Stopword_Removed_Philippines2016[i] <-slim_text(Pac_Philippines_2016$Text[i], rm_place = FALSE)
}
Philippines2016_date_tag <- as.Date(paste(Pac_Philippines_2016$Year,Pac_Philippines_2016$Month,Pac_Philippines_2016$Day,sep = '-')) 
Philippines2016_Data <- data.frame(Philippines2016_date_tag,Stopword_Removed_Philippines2016)

processed_Philippines2016 <- textProcessor(Philippines2016_Data$Stopword_Removed_Philippines2016, metadata = Philippines2016_Data)
out_Philippines2016 <- prepDocuments(processed_Philippines2016$documents, processed_Philippines2016$vocab, processed_Philippines2016$meta)
docs_Philippines2016 <- out_Philippines2016$documents
vocab_Philippines2016 <- out_Philippines2016$vocab
meta_Philippines2016 <- out_Philippines2016$meta

storage_Philippines2016 <- searchK(documents = out_Philippines2016$documents,vocab = out_Philippines2016$vocab,
                                   K=c(5,7,10), prevalence = ~s(Philippines2016_date_tag), 
                                   data = out_Philippines2016$meta)
storage_Philippines2016$results # print result

poliblogPrevFit_Philippines2016 <- stm(documents = out_Philippines2016$documents,vocab = out_Philippines2016$vocab,
                                       K=10, prevalence = ~s(Philippines2016_Data$Philippines2016_date_tag),
                                       max.em.its = 75, data = out_Philippines2016$meta, seed = 1111)
plot.STM(poliblogPrevFit_Philippines2016, type = "summary",n=5,xlim = c(0,1),family = "Microsoft YaHei") # plot STM result

pdf("Topics_Philippines2016.pdf",family = "GB1",width = 9)
plot.STM(poliblogPrevFit_Philippines2016, type = "summary",n=5,xlim = c(0,1))
dev.off()

pdf("WordCloud_Philippines2016_Topic4.pdf",family = "GB1")
cloud(poliblogPrevFit_Philippines2016, topic = 4,scale = c(2.5,.5))
dev.off()

pdf("WordCloud_Philippines2016_Topic2.pdf",family = "GB1")
cloud(poliblogPrevFit_Philippines2016, topic = 2,scale = c(2.5,.5))
dev.off()

pdf("WordCloud_Philippines2016_Topic8.pdf",family = "GB1")
cloud(poliblogPrevFit_Philippines2016, topic = 8,scale = c(2.5,.5))
dev.off()

pdf("WordCloud_Philippines2016_Topic5.pdf",family = "GB1")
cloud(poliblogPrevFit_Philippines2016, topic = 5,scale = c(2.5,.5))
dev.off()

pdf("WordCloud_Philippines2016_Topic9.pdf",family = "GB1")
cloud(poliblogPrevFit_Philippines2016, topic = 9,scale = c(2.5,.5))
dev.off()

topic_corr <- topicCorr(poliblogPrevFit_Philippines2016)
plot(topic_corr)

# India 2017
Origin_token_India2017 <- seg_file(Pac_India_2017$Text, from = 'v')
Stopword_Removed_India2017 <- c(NULL)
for (i in 1:length(Pac_India_2017$Text)){
  Stopword_Removed_India2017[i] <-slim_text(Pac_India_2017$Text[i], rm_place = FALSE)
}
India2017_date_tag <- as.Date(paste(Pac_India_2017$Year,Pac_India_2017$Month,Pac_India_2017$Day,sep = '-')) 
India2017_Data <- data.frame(India2017_date_tag,Stopword_Removed_India2017)

processed_India2017 <- textProcessor(India2017_Data$Stopword_Removed_India2017, metadata = India2017_Data)
out_India2017 <- prepDocuments(processed_India2017$documents, processed_India2017$vocab, processed_India2017$meta)
docs_India2017 <- out_India2017$documents
vocab_India2017 <- out_India2017$vocab
meta_India2017 <- out_India2017$meta

storage_India2017 <- searchK(documents = out_India2017$documents,vocab = out_India2017$vocab,
                             K=c(5,7,10), prevalence = ~s(India2017_date_tag), 
                             data = out_India2017$meta)
storage_India2017$results # print result

poliblogPrevFit_India2017 <- stm(documents = out_India2017$documents,vocab = out_India2017$vocab,
                                 K=5, prevalence = ~s(India2017_date_tag),
                                 max.em.its = 75, data = out_India2017$meta, seed = 1111)
plot.STM(poliblogPrevFit_India2017, type = "summary",n=5,xlim = c(0,1),family = "Microsoft YaHei") # plot STM result

pdf("Topics_India2017.pdf",family = "GB1",width = 9)
plot.STM(poliblogPrevFit_India2017, type = "summary",n=5,xlim = c(0,1))
dev.off()

pdf("WordCloud_India2017_Topic2.pdf",family = "GB1")
cloud(poliblogPrevFit_India2017, topic = 2,scale = c(2.5,.5))
dev.off()

pdf("WordCloud_India2017_Topic4.pdf",family = "GB1")
cloud(poliblogPrevFit_India2017, topic = 4,scale = c(2.5,.5))
dev.off()

pdf("WordCloud_India2017_Topic1.pdf",family = "GB1")
cloud(poliblogPrevFit_India2017, topic = 1,scale = c(2.5,.5))
dev.off()

pdf("WordCloud_India2017_Topic3.pdf",family = "GB1")
cloud(poliblogPrevFit_India2017, topic = 3,scale = c(2.5,.5))
dev.off()

pdf("WordCloud_India2017_Topic5.pdf",family = "GB1")
cloud(poliblogPrevFit_India2017, topic = 5,scale = c(2.5,.5))
dev.off()

topic_corr <- topicCorr(poliblogPrevFit_India2017)
plot(topic_corr)